In [1]:
import numpy as np
import pandas as pd
import json
from ipywidgets import interact
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
output_notebook()


Loading BokehJS ...

In [2]:
#your_local_path="F:/Study/Dashboard/Coffee/"

In [3]:
with open('Cafe.json') as f:
    i=1
    for line in f:
        coffee = pd.DataFrame(json.loads(line))

In [4]:
# coffee = pd.read_json('Cafe1.json')
#coffee = coffee.sample(n=1000)
#coffee.reset_index(drop=True)

In [5]:
stdf = coffee['Monthly Data'].apply(json.loads).apply(pd.Series)
stdf.reset_index(drop=True)
#stdf.head()


Out[5]:
2013:10 2013:11 2013:12 2013:9 2014:1 2014:10 2014:11 2014:12 2014:2 2014:3 ... 2016:8 2016:9 2017:1 2017:2 2017:3 2017:4 2017:5 2017:6 2017:7 2017:8
0 {'NoOfEmployees': 12.0, 'CostIncurredAtStore':... {'NoOfEmployees': 13.0, 'CostIncurredAtStore':... {'NoOfEmployees': 14.0, 'CostIncurredAtStore':... {'NoOfEmployees': 11.0, 'CostIncurredAtStore':... {'NoOfEmployees': 15.0, 'CostIncurredAtStore':... {'NoOfEmployees': 31.0, 'CostIncurredAtStore':... {'NoOfEmployees': 32.0, 'CostIncurredAtStore':... {'NoOfEmployees': 34.0, 'CostIncurredAtStore':... {'NoOfEmployees': 16.0, 'CostIncurredAtStore':... {'NoOfEmployees': 17.0, 'CostIncurredAtStore':... ... {'NoOfEmployees': 114.0, 'CostIncurredAtStore'... {'NoOfEmployees': 122.0, 'CostIncurredAtStore'... {'NoOfEmployees': 146.0, 'CostIncurredAtStore'... {'NoOfEmployees': 157.0, 'CostIncurredAtStore'... {'NoOfEmployees': 170.0, 'CostIncurredAtStore'... {'NoOfEmployees': 176.0, 'CostIncurredAtStore'... {'NoOfEmployees': 190.0, 'CostIncurredAtStore'... {'NoOfEmployees': 206.0, 'CostIncurredAtStore'... {'NoOfEmployees': 215.0, 'CostIncurredAtStore'... {'NoOfEmployees': 222.0, 'CostIncurredAtStore'...
1 {'NoOfEmployees': 12.0, 'CostIncurredAtStore':... {'NoOfEmployees': 13.0, 'CostIncurredAtStore':... {'NoOfEmployees': 14.0, 'CostIncurredAtStore':... {'NoOfEmployees': 11.0, 'CostIncurredAtStore':... {'NoOfEmployees': 15.0, 'CostIncurredAtStore':... {'NoOfEmployees': 30.0, 'CostIncurredAtStore':... {'NoOfEmployees': 31.0, 'CostIncurredAtStore':... {'NoOfEmployees': 34.0, 'CostIncurredAtStore':... {'NoOfEmployees': 17.0, 'CostIncurredAtStore':... {'NoOfEmployees': 18.0, 'CostIncurredAtStore':... ... {'NoOfEmployees': 147.0, 'CostIncurredAtStore'... {'NoOfEmployees': 160.0, 'CostIncurredAtStore'... {'NoOfEmployees': 212.0, 'CostIncurredAtStore'... {'NoOfEmployees': 223.0, 'CostIncurredAtStore'... {'NoOfEmployees': 239.0, 'CostIncurredAtStore'... {'NoOfEmployees': 257.0, 'CostIncurredAtStore'... {'NoOfEmployees': 268.0, 'CostIncurredAtStore'... {'NoOfEmployees': 293.0, 'CostIncurredAtStore'... {'NoOfEmployees': 304.0, 'CostIncurredAtStore'... {'NoOfEmployees': 317.0, 'CostIncurredAtStore'...
2 {'NoOfEmployees': 12.0, 'CostIncurredAtStore':... {'NoOfEmployees': 13.0, 'CostIncurredAtStore':... {'NoOfEmployees': 14.0, 'CostIncurredAtStore':... {'NoOfEmployees': 11.0, 'CostIncurredAtStore':... {'NoOfEmployees': 15.0, 'CostIncurredAtStore':... {'NoOfEmployees': 28.0, 'CostIncurredAtStore':... {'NoOfEmployees': 30.0, 'CostIncurredAtStore':... {'NoOfEmployees': 32.0, 'CostIncurredAtStore':... {'NoOfEmployees': 16.0, 'CostIncurredAtStore':... {'NoOfEmployees': 17.0, 'CostIncurredAtStore':... ... {'NoOfEmployees': 94.0, 'CostIncurredAtStore':... {'NoOfEmployees': 103.0, 'CostIncurredAtStore'... {'NoOfEmployees': 135.0, 'CostIncurredAtStore'... {'NoOfEmployees': 148.0, 'CostIncurredAtStore'... {'NoOfEmployees': 157.0, 'CostIncurredAtStore'... {'NoOfEmployees': 166.0, 'CostIncurredAtStore'... {'NoOfEmployees': 179.0, 'CostIncurredAtStore'... {'NoOfEmployees': 193.0, 'CostIncurredAtStore'... {'NoOfEmployees': 204.0, 'CostIncurredAtStore'... {'NoOfEmployees': 223.0, 'CostIncurredAtStore'...
3 NaN NaN NaN NaN {'NoOfEmployees': 11.0, 'CostIncurredAtStore':... {'NoOfEmployees': 24.0, 'CostIncurredAtStore':... {'NoOfEmployees': 26.0, 'CostIncurredAtStore':... {'NoOfEmployees': 27.0, 'CostIncurredAtStore':... {'NoOfEmployees': 12.0, 'CostIncurredAtStore':... {'NoOfEmployees': 13.0, 'CostIncurredAtStore':... ... {'NoOfEmployees': 92.0, 'CostIncurredAtStore':... {'NoOfEmployees': 100.0, 'CostIncurredAtStore'... {'NoOfEmployees': 116.0, 'CostIncurredAtStore'... {'NoOfEmployees': 121.0, 'CostIncurredAtStore'... {'NoOfEmployees': 123.0, 'CostIncurredAtStore'... {'NoOfEmployees': 134.0, 'CostIncurredAtStore'... {'NoOfEmployees': 142.0, 'CostIncurredAtStore'... {'NoOfEmployees': 151.0, 'CostIncurredAtStore'... {'NoOfEmployees': 155.0, 'CostIncurredAtStore'... NaN
4 NaN NaN NaN NaN NaN {'NoOfEmployees': 18.0, 'CostIncurredAtStore':... {'NoOfEmployees': 19.0, 'CostIncurredAtStore':... {'NoOfEmployees': 21.0, 'CostIncurredAtStore':... NaN NaN ... {'NoOfEmployees': 72.0, 'CostIncurredAtStore':... {'NoOfEmployees': 80.0, 'CostIncurredAtStore':... {'NoOfEmployees': 109.0, 'CostIncurredAtStore'... {'NoOfEmployees': 117.0, 'CostIncurredAtStore'... {'NoOfEmployees': 125.0, 'CostIncurredAtStore'... {'NoOfEmployees': 136.0, 'CostIncurredAtStore'... {'NoOfEmployees': 149.0, 'CostIncurredAtStore'... {'NoOfEmployees': 160.0, 'CostIncurredAtStore'... {'NoOfEmployees': 163.0, 'CostIncurredAtStore'... {'NoOfEmployees': 174.0, 'CostIncurredAtStore'...
5 NaN NaN NaN NaN NaN NaN {'NoOfEmployees': 11.0, 'CostIncurredAtStore':... {'NoOfEmployees': 12.0, 'CostIncurredAtStore':... NaN NaN ... {'NoOfEmployees': 49.0, 'CostIncurredAtStore':... {'NoOfEmployees': 53.0, 'CostIncurredAtStore':... {'NoOfEmployees': 62.0, 'CostIncurredAtStore':... {'NoOfEmployees': 67.0, 'CostIncurredAtStore':... {'NoOfEmployees': 74.0, 'CostIncurredAtStore':... {'NoOfEmployees': 77.0, 'CostIncurredAtStore':... {'NoOfEmployees': 83.0, 'CostIncurredAtStore':... {'NoOfEmployees': 90.0, 'CostIncurredAtStore':... {'NoOfEmployees': 97.0, 'CostIncurredAtStore':... {'NoOfEmployees': 99.0, 'CostIncurredAtStore':...
6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... {'NoOfEmployees': 34.0, 'CostIncurredAtStore':... {'NoOfEmployees': 36.0, 'CostIncurredAtStore':... {'NoOfEmployees': 49.0, 'CostIncurredAtStore':... {'NoOfEmployees': 52.0, 'CostIncurredAtStore':... {'NoOfEmployees': 55.0, 'CostIncurredAtStore':... {'NoOfEmployees': 59.0, 'CostIncurredAtStore':... {'NoOfEmployees': 60.0, 'CostIncurredAtStore':... {'NoOfEmployees': 61.0, 'CostIncurredAtStore':... {'NoOfEmployees': 66.0, 'CostIncurredAtStore':... NaN

7 rows × 48 columns


In [6]:
coffee = coffee[['Store ID', 'Name', 'Brand', 'Store Number', 'Phone Number',
       'Ownership Type', 'Street Combined', 'Street 1', 'Street 2', 'Street 3',
       'City', 'State', 'Country', 'Coordinates', 'Latitude', 'Longitude',
       'Start Date', 'Monthly Data']]

In [7]:
#len(coffee.Country.unique())

In [8]:
stdf.columns = range(0,len(stdf.columns))

In [9]:
stdf['City'] = coffee['City']
stdf['Name'] = coffee['Name']
stdf_city = stdf[stdf['City'] == 'Pune']
#len(stdf_city['Name'].unique())

In [10]:
#stdf_city[0].apply(pd.Series)

Cost Incurred vs Revenue Plot for stores in Pune

*Bubble size represents Profits


In [11]:
from bokeh.palettes import Spectral11
from bokeh.models import HoverTool
from bokeh.models import LinearInterpolator, CategoricalColorMapper
from bokeh.models import ColumnDataSource

sss = stdf_city[0].apply(pd.Series)
Profit = sss['Profit']
source = ColumnDataSource(dict(
    x= sss['CostIncurredAtStore'],
    y= sss['RevenueOfStore'],
    profit = sss['Profit'],
    profit_scaled = Profit.apply(lambda x: abs(x/5)),
    stores = stdf_city['Name'],    
    emp = sss['NoOfEmployees'],
        
    ))


def update_m (month):
    sss = stdf_city[month].apply(pd.Series)
    Profit = sss['Profit']
    new_data = dict(
        x= sss['CostIncurredAtStore'],
        y= sss['RevenueOfStore'],
        stores = stdf_city['Name'],    
        emp = sss['NoOfEmployees'],
        profit = Profit,
        profit_scaled = Profit.apply(lambda x: abs(x/5))
        
    )
    source.data = new_data #updating the source data with the newdata i.e data of each year
    p1.title.text = str(month)  #updating the title 
    push_notebook() #push this into chart

size_mapper = LinearInterpolator(     #this is to give size for each data point according to their population
    x=[5, 15],
    y = [3,5]
)

#to give color to each type of data point
color_mapper = CategoricalColorMapper(
   factors = list(stdf_city['Name'].unique()),  #this tells the compiler to color the continents
  palette = Spectral11,)

hover = HoverTool(tooltips = [("Employee Count","@emp"),("Profit","@profit"), ("Revenue","@y"), ("Cost Incurred","@x")], #when u hover mouse on data points
                  show_arrow=False)



PLOT_OPTS = dict(     #the dimensions of figure is given
    height =450,
    width = 750,
    x_axis_type='log',
    x_range=[1300, 30000],
    y_range=(1500,30000)
)


p1 = figure( #how do u want the overall dimensions of fig
    title = str('2013 October Cost Incurred vs Revenue'),toolbar_location='above',  #title should always be in string format
    title_location = 'above',
    tools=[hover],
    **PLOT_OPTS)


p1.circle(
    x='x',y='y', #these have been wriiten before and is being called for the sake of hovering to work and is defined in update function
    size={'field':'profit_scaled'},  #we cant use the size of data point as population as the china population is one billion and all the pixels gets filled and hence we use a mapper and give the rangee of X and y axis
    color = {'field':'stores','transform':color_mapper}, #this will color all the regions defined by color_mapper
    legend='stores', #a legend of which color is what continent
    source=source,  #what is the data source
    alpha=0.9) #how much of transparency of data pojint

p1.legend.border_line_color = None  #to remove the border
p1.legend.location = (10,0)  #this is going to take legend out of the chart box
p1.right.append(p1.legend[0]) #this is going to place the legend to the right
show(p1, notebook_handle=True) #notebook_handle will take the consideration of viewing each year that we defined in update


/home/shwetakamal/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/api.py:107: RuntimeWarning: '<' not supported between instances of 'str' and 'int', sort order is undefined for incomparable objects
  result = result.union(other)
/home/shwetakamal/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/api.py:107: RuntimeWarning: '<' not supported between instances of 'int' and 'str', sort order is undefined for incomparable objects
  result = result.union(other)
Out[11]:

<Bokeh Notebook handle for In[11]>


In [12]:
interact(update_m, month=(0,47,1))


Out[12]:
<function __main__.update_m(month)>